#########################################################################################################################
### Replication code for "More than Meets the Eye: Disproportionality and Design in Mixed-Member Majoritarian Systems" 


## Authors: Geoff Allen and Matt Jenkins 
## Date: 2025.07.07 

### LOAD REQUIRED PACKAGES 
library(car)
library(tidyverse)
library(coefplot)
library(stargazer)
library(lmtest)
library(sandwich)
library(betareg)

########################
#### READ IN DATA ######

##  Read in SMD and LSI data 
data = read.csv("Data_All_Countries_8.11.2022.csv") 

## Read in LSI data from all systems
pr = read.csv("PR Systems with Gallagher LSI.csv", header=T)
smd = read.csv("SMD Systems with Gallagher LSI.csv", header=T)
golder = rbind(pr, smd)

## Create electoral system var  
golder$system = ifelse(golder$elecrule==1, "SMD", "PR")

## Read in CLEA data 
load(file = "clea_lc_20201216.RData")
clea = clea_lc_20201216

## READ in PR and FPTP LSI data 
lsi_all <- read.csv("LSI_ALL.csv", header=T)


############################################
################## PLOTS ###################

## CODE FOR FIGURE 1: Country LSI by SMD% (without pan, mex, sey, italy, hungary, korea before 04) 

## Exclude Korea prior to 2004 (not MMM)
data1 = data[!(data$Country=="Korea" & data$Year %in% c(1992,1996,2000)),]

## Exclude non-MMM systems 
data2 = data1[!data1$Country %in% c("Mexico","Panama", "Italy", "Seychelles", "Hungary"),]

## First group by country and smd ###########
grp = data2 %>% group_by(Country, percent_maj) %>%
  summarise(LSI = mean(RL_LSI, na.rm=T), 
            ENPP = mean(G_enpp))

##  FIGURE 1 
ggplot(grp, aes(x=percent_maj, y=LSI))+
  geom_point(size=1.5, aes(color=Country))+
  geom_smooth(aes(x=percent_maj, y=LSI), se=F)+
  geom_text(size=3, aes(label=Country, color=Country),hjust=0, vjust=0)+
  scale_x_continuous(breaks = seq(from=0.1, to=0.90, by=0.10))+
  scale_y_continuous(breaks = seq(from=0, to=30, by=5))+
  xlab("Fraction Nominal Tier Seats")+
  ylab("Average LSI")

######################################
### ADDITIONAL PLOTS FOR APPENDIX ####

## Add year to country name for clarity
data2$label <- paste0(data2$Country, "_", data2$Year)

## FIG A1: SMD by LSI (non-averaged, same 59 caes in main paper)

ggplot(data2, aes(x=percent_maj, y=RL_LSI))+
  geom_point(size=1.5, aes(color=label))+
  geom_smooth(aes(x=percent_maj, y=RL_LSI), se=F)+
  geom_text(size=3, aes(label=label, color=Country),hjust=0, vjust=0)+
  scale_x_continuous(breaks = seq(from=0.1, to=0.90, by=0.10))+
  scale_y_continuous(breaks = seq(from=0, to=30, by=5))+
  xlab("Fraction Nominal Tier Seats")+
  ylab("LSI")+
  theme(legend.position = "none")


## FIGURE A2: SMD by LSI (non-averaged, All 90 mixed-member cases)  

ggplot(data, aes(x=percent_maj, y=RL_LSI))+
  geom_point(size=1.5, aes(color=Country))+
  geom_smooth(aes(x=percent_maj, y=RL_LSI), se=F)+
  geom_text(size=3, aes(label=Country, color=Country),hjust=0, vjust=0)+
  scale_x_continuous(breaks = seq(from=0.1, to=0.90, by=0.10))+
  scale_y_continuous(breaks = seq(from=0, to=30, by=5))+
  xlab("Fraction Nominal Tier Seats")+
  ylab("LSI")+
  theme(legend.position = "none")


## FIGURE A3: SMD by LSI (averaged, main cases in Figure 1 except Senegal)  

ggplot(grp[!grp$Country=="Senegal",], aes(x=percent_maj, y=LSI))+
  geom_point(size=1.5, aes(color=Country))+
  geom_smooth(aes(x=percent_maj, y=LSI), se=F)+
  geom_text(size=3, aes(label=Country, color=Country),hjust=0, vjust=0)+
  #scale_x_continuous(breaks = seq(from=0.1, to=0.90, by=0.10))+
  #scale_y_continuous(breaks = seq(from=0, to=30, by=5))+
  xlab("Fraction Nominal Tier Seats")+
  ylab("Average LSI")

## FIGURE A4: SMD by LSI (averaged, main cases in Figure 1 except Bulgaria)  

ggplot(grp[!grp$Country=="Bulgaria",], aes(x=percent_maj, y=LSI))+
  geom_point(size=1.5, aes(color=Country))+
  geom_smooth(aes(x=percent_maj, y=LSI), se=F)+
  geom_text(size=3, aes(label=Country, color=Country),hjust=0, vjust=0)+
  scale_x_continuous(breaks = seq(from=0.1, to=0.90, by=0.10))+
  scale_y_continuous(breaks = seq(from=0, to=30, by=5))+
  xlab("Fraction Nominal Tier Seats")+
  ylab("Average LSI")


## FIGURE A5: SMD by LSI (averaged, main cases in Figure 1 except Japan)  

ggplot(grp[!grp$Country=="Japan",], aes(x=percent_maj, y=LSI))+
  geom_point(size=1.5, aes(color=Country))+
  geom_smooth(aes(x=percent_maj, y=LSI), se=F)+
  geom_text(size=3, aes(label=Country, color=Country),hjust=0, vjust=0)+
  scale_x_continuous(breaks = seq(from=0.1, to=0.90, by=0.10))+
  scale_y_continuous(breaks = seq(from=0, to=30, by=5))+
  xlab("Fraction Nominal Tier Seats")+
  ylab("Average LSI")


## FIGURE A6: SMD by LSI (averaged, main cases in Figure 1 except Korea)  

ggplot(grp[!grp$Country=="Korea",], aes(x=percent_maj, y=LSI))+
  geom_point(size=1.5, aes(color=Country))+
  geom_smooth(aes(x=percent_maj, y=LSI), se=F)+
  geom_text(size=3, aes(label=Country, color=Country),hjust=0, vjust=0)+
  scale_x_continuous(breaks = seq(from=0.1, to=0.90, by=0.10))+
  scale_y_continuous(breaks = seq(from=0, to=30, by=5))+
  xlab("Fraction Nominal Tier Seats")+
  ylab("Average LSI")


## FIGURE A7: SMD by LSI (averaged, main cases in Figure 1 except Taiwan)  

ggplot(grp[!grp$Country=="Taiwan",], aes(x=percent_maj, y=LSI))+
  geom_point(size=1.5, aes(color=Country))+
  geom_smooth(aes(x=percent_maj, y=LSI), se=F)+
  geom_text(size=3, aes(label=Country, color=Country),hjust=0, vjust=0)+
  scale_x_continuous(breaks = seq(from=0.1, to=0.90, by=0.10))+
  scale_y_continuous(breaks = seq(from=0, to=30, by=5))+
  xlab("Fraction Nominal Tier Seats")+
  ylab("Average LSI")

## FIGURE A8: SMD by LSI (averaged, main cases in Figure 1 except Thailand) 

ggplot(grp[!grp$Country=="Thailand",], aes(x=percent_maj, y=LSI))+
  geom_point(size=1.5, aes(color=Country))+
  geom_smooth(aes(x=percent_maj, y=LSI), se=F)+
  geom_text(size=3, aes(label=Country, color=Country),hjust=0, vjust=0)+
  scale_x_continuous(breaks = seq(from=0.1, to=0.90, by=0.10))+
  scale_y_continuous(breaks = seq(from=0, to=30, by=5))+
  xlab("Fraction Nominal Tier Seats")+
  ylab("Average LSI")


## FIGURE A9: SMD by LSI (averaged by country)  

### average by country
  grp2 = data2 %>% group_by(Country) %>%
  summarise(LSI = mean(RL_LSI, na.rm=T), 
            percent_mean = mean(percent_maj))

ggplot(grp2, aes(x=percent_mean, y=LSI))+
  geom_point(size=1.5, aes(color=Country))+
  geom_smooth(aes(x=percent_mean, y=LSI), se=F)+
  geom_text(size=3, aes(label=Country, color=Country),hjust=0, vjust=0)+
  scale_x_continuous(breaks = seq(from=0.1, to=0.90, by=0.10))+
  scale_y_continuous(breaks = seq(from=0, to=30, by=5))+
  xlab("Fraction Nominal Tier Seats")+
  ylab("Average LSI")


#########################################
############### REGRESSIONS  ############

## TABLE 2: MAIN REGRESSION RESULTS 

# BETA REGRESSION 

## Turn DV into proportion ###
data2$LSI_per <- data2$RL_LSI/100

## Create a new variable for SMD_PER
data2$SMD2 <- data2$percent_maj^2

## Main beta model 
mod_beta <- betareg(LSI_per~percent_maj+SMD2, data=data2)

## Export 
stargazer(mod_beta , single.row=T,star.cutoffs = c(0.05,0.01,0.001))

## OLS estimates 
mod_ols = lm(RL_LSI~percent_maj+SMD2, data=data2)
summary(mod_ols)

## Export 
stargazer(mod_ols, single.row = T, star.cutoffs = c(0.05,0.01,0.001))


## Table A2 in Appendix ##

## Main OLS model (same as above) 
m1 = lm(RL_LSI~percent_maj+SMD2, data=data2)

## CODE FOR JACKNIFED OLS ESTIMATES ##

### Get coefs 
out = summary(m1)
out$coefficients
class(out)

## Regular coefs
coef1 = coef(m1)[2]
coef2 = coef(m1)[3]

#### Create vector to contain estimates of slopes
jack.reg1<-numeric(nrow(data2))
jack.reg2<-numeric(nrow(data2))
jack.reg3<-numeric(nrow(data2))
jack.reg4<-numeric(nrow(data2))

### Loop to get jacknifed estimates 
set.seed(100)

for (i in 1:nrow(data2)) {
  model<-lm(RL_LSI[-i]~percent_maj[-i]+SMD2[-i], data=data2)
  model_summary <- as.data.frame(summary(model)$coefficients)
  jack.reg1[i]<-coef(model)[2]
  jack.reg2[i]<-coef(model)[3]
  jack.reg3[i]<-model_summary[2,2]
  jack.reg4[i]<-model_summary[3,2]
}


hist(jack.reg1)
hist(jack.reg2)

mean(jack.reg1)
mean(jack.reg2)
mean(jack.reg3)
mean(jack.reg4)

### Attach jackknifed to real estimates
coef1a = rep(coef1, nrow(data2))
coef2a = rep(coef2, nrow(data2))

est = as.data.frame(cbind(coef1,coef2,jack.reg1, jack.reg2))

### Estimate bias
est$b1 = est$coef1-est$jack.reg1
est$b2 = est$coef2-est$jack.reg2


#### Estimates without Bulgaria ######
ols_no_bulg = lm(RL_LSI~percent_maj+SMD2, data=data2[!data2$Country=="Bulgaria",])
summary(ols_no_bulg )


#########################################################################
##### FIGURE A10: BIAS HISTOGRAM FROM JACKKNIFED OLS ESTIMATES ##########

## Gather ###
est1 = est %>% gather(key="Variable", value="Bias", 5:6)
est1$Var = ifelse(est1$Variable=="b1", "Nominal Tier%", "Nominal Tier%^2")

## Plot
ggplot(est1, aes(Bias))+
  geom_histogram(bins=50)+
  facet_grid(~Var)

################################
###### TABLE A3 IN APPENDIX ####

## Main beta reg model
mod_beta <- betareg(LSI_per~percent_maj+SMD2, data=data2)

## Code for jackniffed estimates 
out = summary(mod_beta)
out$coefficients
class(out)

## Regular coefs
coef1 = coef(mod_beta)[2]
coef2 = coef(mod_beta)[3]

## Create vector to contain estimates of slopes
jack.reg1<-numeric(nrow(data2))
jack.reg2<-numeric(nrow(data2))
jack.reg3<-numeric(nrow(data2))
jack.reg4<-numeric(nrow(data2))

## Loop to get jackknifed estimates 
for (i in 1:nrow(data2)) {
  model<-betareg(LSI_per[-i]~percent_maj[-i]+SMD2[-i], data=data2)
  model_summary <- as.data.frame(summary(model)$coefficients)
  jack.reg1[i]<-coef(model)[2]
  jack.reg2[i]<-coef(model)[3]
  jack.reg3[i]<-model_summary[2,2]
  jack.reg4[i]<-model_summary[3,2]
}


hist(jack.reg1)
hist(jack.reg2)

mean(jack.reg1)
mean(jack.reg2)
mean(jack.reg3)
mean(jack.reg4)

## Attach jackknifed to real estimates
coef1a = rep(coef1, nrow(data2))
coef2a = rep(coef2, nrow(data2))

est = as.data.frame(cbind(coef1,coef2,jack.reg1, jack.reg2))

## Estimate bias
est$b1 = est$coef1-est$jack.reg1
est$b2 = est$coef2-est$jack.reg2


## beta reg estimates without Bulgaria 
m3 = betareg(LSI_per~percent_maj+SMD2, data=data2[!data2$Country=="Bulgaria",])
summary(m3)


### FIGURE A11 ##
 
## Put data into long form
est1 = est %>% gather(key="Variable", value="Bias", 5:6)
est1$Var = ifelse(est1$Variable=="b1", "Nominal Tier%", "Nominal Tier%^2")

## plot
ggplot(est1, aes(Bias))+
  geom_histogram(bins=30)+
  facet_grid(~Var)

####################################################
### Figure A12: COOK'S DISTANCE FOR BETA REG   #####

## Make index ordered
rownames(data2) <- seq_len(nrow(data2))

## Fit beta regression model
model <- betareg(LSI_per ~ percent_maj + SMD2, data = data2)
summary(model)

## Obtain the model matrix (design matrix)
X <- model.matrix(model)

## Extract the fitted values and the Pearson residuals
fitted_values <- fitted(model)
pearson_residuals <- residuals(model, type = "pearson")

## Calculate the leverage (hat values)
hat_values <- diag(X %*% solve(t(X) %*% X) %*% t(X))

## Get the dispersion parameter (scale) from the model
co <- as.data.frame(model$coefficients)
co[1,2]
dispersion <- 1/co

## Calculate the Mean Squared Error (MSE) from the Pearson residuals
mse <- sum(pearson_residuals^2) / (length(pearson_residuals) - length(coef(model)))

## Cook's Distance formula
cooks_dist_manual <- (pearson_residuals^2 / (length(coef(model)) * mse)) * (hat_values / (1 - hat_values)^2)

## Assign the row names of the original dataset to Cook's distance
names(cooks_dist_manual) <- rownames(data2)

## Create a results data frame with Cook's distance and fitted values
results_manual <- data.frame(
  Observation = rownames(data2),
  Fitted = fitted_values,
  Cooks_Distance = cooks_dist_manual
)

## View the results
print(results_manual)

##
p <- length(coef(model))

## FIGURE A12: Cook's Distance for Beta Regression 
plot(cooks_dist_manual, type = "h", main = "Cook's Distance for Beta Regression",
     ylab = "Cook's Distance", xlab = "Observation Number")
abline(h = 4/(nrow(data2)-p), col = "red", lty = 2)  # Threshold line


########################################################
######## ANALYSIS OF CANDIDATE NOMINATION DATA #########

## Select countries in our sample 

cleak = clea[clea$ctr_n %in% c("Korea") & clea$yr %in% c(2004,2008, 2012),]
cleaj = clea[clea$ctr_n %in% c("Japan") & clea$yr %in% c(2009,2012, 2014),]
cleat = clea[clea$ctr_n %in% c("Taiwan")& clea$yr %in% c(2008,2012, 2016),]
cleag = clea[clea$ctr_n %in% c("Georgia")& clea$yr %in% c(2012, 2016),]
cleal = clea[clea$ctr_n %in% c("Lithuania")& clea$yr %in% c(2004,2008, 2012),]

## Put back together
clea1 = rbind(cleak, cleaj, cleat,cleal, cleag)

## Keep only columns that we need  33 is seats won
clea2 = clea1[, c(4, 6, 9, 10, 11, 12, 13, 33)]


## Summarize data ##

## NOTE: 0001-3999. Political parties     4000. "Others" (i.e., more than two small parties are grouped) 
## 6000. "Independents" (i.e.,
## 5001-5999. Electoral coalitions or alliances between political parties

## Get unique rows only so that you don't count duplicate candidates 
kdat_comp = distinct(clea2)

## Create party type var 
kdat_comp$type = ifelse(kdat_comp$pty>= 1 & kdat_comp$pty<= 3999, "PARTY", 
                        ifelse(kdat_comp$pty> 3999 & kdat_comp$pty<= 5999, "OTHER", 
                               ifelse(kdat_comp$pty> 5999, "IND", NA)))


## Get number of candidates and number of wins, nominations
dats = kdat_comp[kdat_comp$cst<=900,] %>% group_by(ctr_n, yr, pty, type) %>%
  summarize(CANDS_NOM = (length(pty)-1), 
            wins = length(which(seat==1)))

## calculate total number of districts for each year (seats available)
# remove cst greater than 900 bc that's pr  
dists = kdat_comp[kdat_comp$cst<=900,] %>% group_by(ctr_n, yr) %>%
  summarize(DISTN = length(unique(cst)))

## merge 
dats_m = merge(dats, dists, by=c("ctr_n", "yr"))

## Calculate percent of districts in which a candidate was nominated by each party
dats_m$per_nom = dats_m$CANDS_NOM/dats_m$DISTN

##  Calculate the win percentage where for dist in which candidate was nominated
dats_m$win_per = dats_m$wins/(dats_m$CANDS_NOM)


## Add SMD% variable
dats_m$SMD_PER = ifelse(dats_m$ctr_n=="Korea", 80, ifelse(dats_m$ctr_n=="Japan", 63, 
                                                          ifelse(dats_m$ctr_n=="Taiwan", 70, 
                                                                 ifelse(dats_m$ctr_n=="Georgia", 50,
                                                                        ifelse(dats_m$ctr_n=="Lithuania", 50, 50)))))

## Add pr vote data 
pr = read.csv("Parties with ID Numbers and PR Vote Share.csv", header=T)

## Change party name column 
dats_m$partynumber = dats_m$pty
dats_m$country = dats_m$ctr_n
dats_m$year = dats_m$yr

## Merge district vote data with pr vote data 
datos = merge(dats_m,pr, by=c("partynumber", "country", "year"))

## Gather size var 
datos$size = ifelse(datos$large==1, "Large", ifelse(datos$medium==1, "Medium", "Small"))
datos$size <- factor(datos$size, levels = c("Small", "Medium", "Large"))

## Make SMD fator 
datos$SMD = as.factor(datos$SMD_PER)

#### Add Taiwan TSU
a = as.data.frame(pr[pr$country=="Taiwan" & pr$year=="2012" & pr$partynumber==16,])
a$size = ifelse(a$large==1, "Large", ifelse(a$medium==1, "Medium", "Small"))
a$win_per = 0 
a$per_nom = 0
a$type = "PARTY"
a$CANDS_NOM = 0
a$SMD = as.factor(70)
a$SMD_PER = 70
a$pty  = 16
a$DISTN = unique(datos$DISTN[datos$country=="Taiwan" & datos$year=="2012"])
a$wins = 0
a$ctr_n  = "Taiwan"  
a$yr = 2012

### Rbind to datos
datos1 = as.data.frame(bind_rows(datos, a))

## Set factor levels 
datos1$size <- factor(datos1$size, levels = c("Small", "Medium", "Large")) 


#########################################################
## MAIN REGRESSIONS MODELS FOR CANDIDATE NOMINATION DATA  

#################
#### TABLE 3 ####

## Create SMD tier proportion var 
datos1$SMD_FAC = as.factor(datos1$SMD_PER)

model1 = lm(per_nom~SMD_FAC+yr, data=datos1[!datos1$size=="Large",])
summary(model1)


#################
### TABLE A4 ####

## Quadratic model (OLS)  

## Remove Georgia
derts <- datos1[!datos1$country=="Georgia",]

## Make SMD numeric for quadratic models
derts$SMD_NUM <- as.numeric(ifelse(derts$SMD=="50", 50, 
                                   ifelse(derts$SMD=="70", 70,
                                          ifelse(derts$SMD=="80", 80,
                                              ifelse(derts$SMD=="63", 63, NA)))))

## Add square to data set
derts$SMD_NUM2 <- derts$SMD_NUM^2
derts$SMD_NUM_PER = derts$SMD_NUM/100
derts$SMD_NUM_PER2 = derts$SMD_NUM_PER^2

## Estimate models
md5a = lm(per_nom~SMD_NUM+yr, data=derts[!derts$size=="Large",])
md5b = lm(per_nom~SMD_NUM+SMD_NUM2+yr, data=derts[!derts$size=="Large",])

summary(md5a)
summary(md5b)

## Prevent division by 0 
derts$per_nom1 <- derts$per_nom+0.00000001

## Estimate beta regressions with and w/o quadratic
beta_a = betareg(per_nom1~SMD_NUM+yr, data=derts[!derts$size=="Large",])
beta_b = betareg(per_nom1~SMD_NUM+SMD_NUM2+yr, data=derts[!derts$size=="Large",])

summary(beta_a)
summary(beta_b)

## Export
stargazer(md5a,md5b,beta_a, beta_b, single.row = T)


########################################################################
### TABLE A5: Nomination Percentage Regressions with Lagged PR Vote ####

model_a5 = lm(per_nom~SMD_FAC*lag(prvote)+yr, data=datos1[!datos1$size=="Large",])
summary(model_a5)

## Export
stargazer(model_a5)

## Main plot of candidate nominations by pr vote 

## Convert inf to finite 
is.na(datos1)<-sapply(datos1, is.infinite)


## Remove Georgia
datos2 = datos1[!datos1$country=="Georgia" ,]

## FIGURE A14: SMALL PARTIES

datos2[datos2$type=="PARTY" & datos2$size=="Small" ,] %>% mutate(per_nom = coalesce(per_nom, 0))%>%
  ggplot(aes(x=prvote, y=per_nom, shape=SMD))+
  geom_point()+
  facet_wrap(~size, scales = "free")+
  xlab("National PR Tier Vote Share")+
  ylab("Percent of Districts with Nomination")

## FIGURE A15: Medium Parties 

datos2[datos2$type=="PARTY" & datos2$size=="Medium" ,] %>% mutate(per_nom = coalesce(per_nom, 0))%>%
  ggplot(aes(x=prvote, y=per_nom, shape=SMD))+
  geom_point()+
  facet_wrap(~size, scales = "free")+
  xlab("National PR Tier Vote Share")+
  ylab("Percent of Districts with Nomination")


#### Figure A13: PR VS SMD FPTP LSI 

## gather ###
lsi_comp <- gather(lsi_all, key="system_type", value="lsi", 1:2)

## plot
ggplot(lsi_comp, aes(x=system_type, y=lsi))+
  geom_violin()+
  geom_jitter()+
  ylab("LSI (Vote-seat Disproportionality")+
  xlab("System Type")



#####################################
######## SIMULATION CODE ############

## Set parameters
set.seed(101)
sims <- 10000
spill <- 5
epsilon <- 1e-4
prvote_big = pr$prvote

## Create variable shells
tot.seats <- rep(NA,sims)
smd.seats <- rep(NA,sims)
pr.seats <- rep(NA,sims)
smd.tier <- rep(NA, sims)
pr.tier <- rep(NA, sims)
pr.vote <- rep(NA, sims)
opp.cost <- rep(NA, sims)
smd.win.per <- rep(NA, sims)
noms <-  rep(NA, sims)
spill.per =rep(NA, sims)

#### Remove missing data for reg model 
datos1_clean <- subset(datos1, !is.na(win_per) & !is.na(prvote) &
                         !is.nan(win_per) & !is.nan(prvote) &
                         !is.infinite(win_per) & !is.infinite(prvote))

## Simulation for Loop
for (j in 1:sims){

  ### Draw tier sizes
  pr.per = sample(c(50,40,30, 20), 1)## simulate pr tier size
  smd.per = 100-pr.per ##smd tier size, legislature = 100
  pr_vote_base = sample(prvote_big, 1) ### sample base pr vote 
 
  ### Calculate model to get smd win per
  myglm <- lm(win_per~prvote, data = datos1_clean)
  dat=as.data.frame(cbind(pr_vote_base, smd.per))
  colnames(dat)[1] = "prvote"
  colnames(dat)[2] = "SMD_PER"
  dat$prsq = dat$prvote^2
  smd.win.per <- abs(predict(myglm, newdata=dat))
  
  ## Draw number of candidates
  max_cand = floor(ifelse(pr_vote_base<smd.per,ceiling(pr_vote_base), smd.per))
  m = sample(seq(from=0, to=max_cand, by=1), 1) ## num of cands to nom, 0 up to total pr base (total resources)
 
  ## Simulate wins based on smd win per and m
  D <- rbinom(m, size=1, prob=smd.win.per)#### generate wins with prob = smd.win.per
  
  ## Add up wins and prvote and get total seats
  smd_wins = sum(D) ## total wins
  spill_per = (((spill/smd.per)*m)/100)
  pr_vote_tot = (pr_vote_base + ((spill/smd.per)*m))
  pr_seats = floor((pr_vote_tot/100)*pr.per)##pr vote total is base plus spillover for M
  tot_seats = (smd_wins + pr_seats)
  opp_cost = ((m-smd_wins)/(m+epsilon)*pr_vote_base) ## opp cost is diff between win per and resources invested (add 0.0001 to avoid division by 0)
  
  ### Obtain values
  tot.seats[j] <- tot_seats
  smd.seats[j] <- smd_wins
  pr.seats[j] <- pr_seats
  smd.tier[j] <-smd.per
  pr.tier[j] <- pr.per
  pr.vote[j] <-pr_vote_base 
  opp.cost[j] <- opp_cost
  smd.win.per[j] = smd.win.per
  noms[j] = m
  spill.per[j] = spill_per
}
### End sim loop ##


###########################################
########### SIMULATION PLOTS ##############

## Gather simulation data
sim_dat = as.data.frame(cbind(tot.seats, smd.seats, pr.seats,smd.tier, pr.tier,pr.vote, opp.cost, noms, spill.per))


## Group and average by pr vote and SMD tier 
avg_sims = sim_dat %>% group_by(pr.vote, smd.tier) %>%
  summarise(seats = mean(tot.seats),
            sd = sd(tot.seats), 
            cost=mean(opp.cost), 
            sd = sd(opp.cost), 
            nom = mean(noms), 
            spills = mean(spill.per))

## Create party size var 
avg_sims$size = ifelse(avg_sims$pr.vote>=0 &avg_sims$pr.vote<6,"SMALL", 
                       ifelse(avg_sims$pr.vote>=6 &avg_sims$pr.vote< 20,"MEDIUM","LARGE"))




###  PRIMARY PLOT FOR SIMS: NOMINATIONS AND EXPECTED SEATS ##

## Make tier size a factor 
avg_sims$SMD_PER = as.factor(avg_sims$smd.tier)


## FIGURE 2: PLOT FOR SMALL/MEDIUM PARTIES 

ggplot(avg_sims[!avg_sims$size=="LARGE",], aes(x = nom, y=seats, group=SMD_PER))+
  geom_smooth(aes(linetype=SMD_PER), se=T)+
  xlab("Number of Candidates Nominated")+
  coord_cartesian(xlim=c(0,10),ylim=c(0,13) )+
  ylab("Expected Number of Seats")+
  scale_x_continuous(breaks=seq(0,10,1))+
  scale_y_continuous(breaks=seq(0,13,1))+
  guides(linetype=guide_legend(title="% Nominal Tier Seats"))

## FIGURE 3:  PLOT FOR SMALL PARTIES 

ggplot(avg_sims[avg_sims$size=="SMALL",], aes(x = nom, y=seats, group=SMD_PER))+
  geom_smooth(aes(linetype=SMD_PER), se=T)+
  xlab("Number of Candidates Nominated")+
  coord_cartesian(xlim=c(0,4),ylim=c(0,4) )+
  ylab("Expected Number of Seats")+
  scale_x_continuous(breaks=seq(0,4,1))+
  scale_y_continuous(breaks=seq(0,4,1))+
  guides(linetype=guide_legend(title="% Nominal Tier Seats"))

## FIGURE 4:  PLOT FOR MEDIUM PARTIES 

ggplot(avg_sims[avg_sims$size=="MEDIUM",], aes(x = nom, y=seats, group=SMD_PER))+
  geom_smooth(aes(linetype=SMD_PER), se=T)+
  xlab("Number of Candidates Nominated")+
  coord_cartesian(xlim=c(0,10),ylim=c(0,14) )+
  ylab("Expected Number of Seats")+
  scale_x_continuous(breaks=seq(0,10,1))+
  scale_y_continuous(breaks=seq(0,14,1))+
  guides(linetype=guide_legend(title="% Nominal Tier Seats"))


############################################
################### END ####################